/*
Construct the stacked sample.
*/

cap program drop stack_weight_sample
program define stack_weight_sample 
    version 18.0
    syntax varlist, ///
        agevar(varlist) ///
	[matchvar_m2(varlist)] ///
        first_cohort(int) ///
	last_cohort(int) ///
	min_event(int) ///
	max_event(int) ///
	outcomes(varlist) ///
	[no_match_reweight] ///
	[control(string)] ///
	[min_age(numlist int)]
	
    if ("`min_age'" == "") {
    	local min_age = 1 // require firm to be at least 1 year old by t=-1
    }

    forvalues year = `first_cohort'/`last_cohort' {
      di "Constructing weighted sample for cohort `year'..."
      quietly {
      local year_m1 = `year' - 1 // which period to require ownership
      local year_m2 = `year' - 2 // reference period
      local year_m10 = `year' - 10 // for control group
      
      * keep if the person owns a firm in t=-1 (report a balance sheet)
      preserve
      
      if ("`control'" == "never") {
        bys lnr lfirm : egen sample`year' = ///
            max((currently_own == 1) * ///
	        (cohort_id <= `year_m10') * ///
	        (year_first_child == .) * ///
	        (year == `year_m1') * ///
                (firm_age >= `min_age') * ///
	        (operating_profits != .))
      }
      else if ("`control'" == "parents") {
        bys lnr lfirm : egen sample`year' = ///
            max((currently_own == 1) * ///
	        (cohort_id <= `year_m10') * ///
	        (year_first_child <= `year_m10') * ///
	        (year == `min_age') * ///
                (firm_age >= `min_age') * ///
	        (operating_profits != .))
      }
      else if ("`control'" == "parents_m10") {
        bys lnr lfirm : egen sample`year' = ///
            max((currently_own == 1) * ///
	        (cohort_id <= `year_m10') * ///
	        (year_last_child <= `year_m10' | year_first_child == .) * ///
	        (year == `year_m1') * ///
                (firm_age >= `min_age') * ///
	        (operating_profits != .))
      }
      else {
        bys lnr lfirm : egen sample`year' = ///
            max((currently_own == 1) * ///
	        (cohort_id <= `year_m10') * ///
	        (year_first_child <= `year_m10' | year_first_child == .) * ///
	        (year == `year_m1') * ///
                (firm_age >= `min_age') * ///
	        (operating_profits != .))
      }

      keep if sample`year' == 1 | cohort_id == `year'
      bys lnr lfirm : egen exist_m2 = ///
          max((year == `year_m2') * (operating_profits != .))
      keep if exist_m2 == 1
      
      * determine weighting age based on cohort - 2
      bys lnr lfirm : egen age_cohort = max(`agevar' * (year == `year_m2'))
      
      * match (optionally) on another condition in cohort - 2
      if ("`matchvar_m2'" != "") {
         bys lnr lfirm : egen extra_match_X = max(`matchvar_m2' * (year == `year_m2'))
      }
      else {
          gen extra_match_X = 0
      }
      
      * weight each individual equally
      gen ones = 1
      bys lnr year : egen n_firms = sum(ones)
      gen own_wt = 1 / n_firms
      gen treated = (cohort_id == `year')
      
      * get the estimation weights  
      bys male year `varlist' age_cohort extra_match_X: ///
          egen wt_X_treat = sum(own_wt * treated)
      bys male year `varlist' age_cohort extra_match_X: ///
          egen wt_X_control = sum(own_wt * (1 - treated))
	  
      * if wt_X_control = 0, we don't have a control match (uncommon)
      keep if wt_X_control != 0 
      * if wt_X_treat = 0, the controls aren't useful
      keep if wt_X_treat != 0
      
      gen X_wt = own_wt if treated == 1
      replace X_wt = own_wt * wt_X_treat / wt_X_control if treated == 0
      
      * if extra match variable specified, reweight again so that distribution
      * of sectors is the same in both samples
      if ("`matchvar_m2'" != "" & "`no_match_reweight'" != "") {
          bys male year `varlist': ///
              egen wt_ind_treat = sum(own_wt * treated)
	  bys male year: ///
	      egen wt_ind_treat_sum = sum(own_wt * treated)
	  bys male year `varlist' extra_match_X: ///
	      egen wt_ind_treat_extra = sum(own_wt * treated)
	  bys male year extra_match_X: ///
	      egen wt_ind_treat_extra_sum = sum(own_wt * treated)
	      
	  replace X_wt = ///
	      X_wt * (wt_ind_treat / wt_ind_treat_sum) / ///
	             (wt_ind_treat_extra / wt_ind_treat_extra_sum)
      }
      
      drop wt_* ones own_wt n_firms ones exist_m2 age_cohort
      
      * construct the auxiliary variables
      * these identify existence
      gen event_time = year - `year'
      keep if inrange(event_time, `min_event', `max_event')
      forvalues i = `min_event'/-1 {
          local ia = abs(`i')
          gen et_c`year'_m`ia' = (event_time == `i')
          gen et_treat_c`year'_m`ia' = (event_time == `i' & treated == 1)
          if `i' <= -2 {
              bys lnr lfirm : egen exist_c`year'_m`ia' = max(et_c`year'_m`ia')
              bys lnr lfirm : egen exist_treat_c`year'_m`ia' = max(et_treat_c`year'_m`ia')
          }
      }
      forvalues i = 0/`max_event' {
          gen et_c`year'_p`i' = (event_time == `i')
          gen et_treat_c`year'_p`i' = (event_time == `i' & treated == 1)
      }
      
      keep lnr lfirm year male treated X_wt et_* exist_* firm_age ///
          edlevel* ind_cat* age currently_own `varlist' `outcomes' `agevar' ///
	  extra_match_X `matchvar_m2'
	  
      if ("`matchvar_m2'" == "") {
          drop extra_match_X
      }
      
      gen cohort_id = `year'
      
      tempfile stack_`year'
      save `stack_`year''
      }
      restore
    }
    
    di "Construction complete. Stacking files."
    * stack across the cohort-level files
    quietly {
    forvalues year = `first_cohort'/`last_cohort' {
    	if (`year' == `first_cohort') {
	    use `stack_`year'', clear
	}
	else {
	    append using `stack_`year''
	}
    }
    }
    
    di "Cleaning up stacked data."
    * replace zeros for years outside of each cohort, add cohort dummies
    quietly {
    forvalues year = `first_cohort'/`last_cohort' {
      gen treated_cohort`year' = (cohort_id == `year') * treated
      gen dcohort`year' = (cohort_id == `year')
      
      forvalues i = `min_event'/-1 {
          local ia = abs(`i')
          replace et_c`year'_m`ia' = 0 if cohort_id != `year'
          replace et_treat_c`year'_m`ia' = 0 if cohort_id != `year'
          if `i' <= -2 {
          	* note: all firms exist in -2
            replace exist_c`year'_m`ia' = 0 if cohort_id != `year'
            replace exist_treat_c`year'_m`ia' = 0 if cohort_id != `year'
          }
      }
      forvalues i = 0/`max_event' {
          replace et_c`year'_p`i' = 0 if cohort_id != `year'
          replace et_treat_c`year'_p`i' = 0 if cohort_id != `year'
      }
    }
    
    * drop the variables which are always 0
    qui: sum year
    local final_year = r(max)
    forvalues year = `first_cohort'/`last_cohort' {
      forvalues i = 1/`max_event' {
          if `year' + `i' > `final_year' {
             drop et_c`year'_p`i'
    	     drop et_treat_c`year'_p`i'
          }
      }
    }
    
    * drop the reference period (m2)
    drop et_*_m2
    drop exist_*_m2 
    }

end
